Kowalczyk et al. 2015 Cell cycle genes based on Kowalczyk et al. 2015, https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4665007/. Cell cycle is a main source of transcriptional variation between HSCs and cycling cells are more abundand in MPPs compared to LT- and ST-HSCs. 2708 genes, ~32% were significant cell cycle-dependent expression. Of those 92% were up-regulated in cycling versus noncycling cells. 8% were upregulated in noncycling versus cycling cells included some important genes for HSC quiescence which we should not filter out (Stat1, Stat3, Meis1, Pbx1, Klf6, Nfia). G1/S, S, G2, and G2/M phases defined based on functional annotations (The Reference Genome Group of the Gene Ontology Consortium 2009). G1 length varies widely among different cell types, is short specifically in self-renewing cells, and increases with differentiation.
options(warn = -1)
# Seurat
library(Seurat)
# Data
library(dplyr)
# library(reshape2)
# library(Matrix)
# Plotting
library(ggplot2)
library(ComplexHeatmap)
library(gridExtra)
library(patchwork)
library(RColorBrewer)
library(viridis)
Attaching SeuratObject
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
Loading required package: grid
========================================
ComplexHeatmap version 2.8.0
Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
Github page: https://github.com/jokergoo/ComplexHeatmap
Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
If you use it in published research, please cite:
Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
genomic data. Bioinformatics 2016.
The new InteractiveComplexHeatmap package can directly export static
complex heatmaps into an interactive Shiny app with zero effort. Have a try!
This message can be suppressed by:
suppressPackageStartupMessages(library(ComplexHeatmap))
========================================
Attaching package: ‘gridExtra’
The following object is masked from ‘package:dplyr’:
combine
Loading required package: viridisLite
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")
# Source files
source("plotting_global.R")
# Filtering Parameter
so_file <- "data/object/seurat_l.rds"
# Plotting Theme
ggplot2::theme_set(theme_global_set()) # From project global source()
so_l <- readRDS(so_file)
library(msigdbr)
go_bp = msigdbr(species = "mouse", category = "C5", subcategory = "BP")
go_bp_ccp <- go_bp %>%
dplyr::filter(gs_exact_source == "GO:0022402") %>%
dplyr::select(gene_symbol, ensembl_gene, gs_exact_source)
pc_annotation <- function(so, dims = 25, loadings = 100, features = go_bp_ccp$gene_symbol) {
pca_loadings <- Loadings(so, reduction = "pca")[, 1:dims]
pca_loadings_genes <- list()
for(pc in colnames(pca_loadings)) {
df <- data.frame(
pca_loadings_genes = names(rev(sort(abs(pca_loadings[, pc])))[1:loadings]),
pc = pc,
sample_group = unique(so$sample_group)
)
df$cc_gene <- ifelse(df$pca_loadings_genes %in% features, "Cell cycle process", "Other")
pca_loadings_genes[[pc]] <- df
}
pca_loadings_genes <- do.call("rbind", pca_loadings_genes)
pca_loadings_genes$pc <- factor(pca_loadings_genes$pc, levels = paste0("PC_", 1:dims))
return(pca_loadings_genes)
}
pc_annotation_cc <- lapply(so_l, pc_annotation)
pc_annotation_cc <- do.call("rbind", pc_annotation_cc)
options(repr.plot.width = 10, repr.plot.height = 5)
ggplot(pc_annotation_cc, aes(x = pc, fill = cc_gene)) +
geom_bar(stat = "count", position = "fill") +
facet_wrap(~sample_group, ncol = 2) +
ggtitle("Cell cycle label frequency in top PC loadings") + xlab("") + ylab("Frequency") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
cc_whitfield <- read.csv("cc_whitfield.csv")
# Select and rename columns
cc_whitfield <- unique(cc_whitfield[, c("LLID", "PHASE")])
colnames(cc_whitfield) <- c("entrezgene_id", "cc_phase")
# Use only unique entrenzgene_id
cc_whitfield <- cc_whitfield[!cc_whitfield$entrezgene_id %in% cc_whitfield$entrezgene_id[duplicated(cc_whitfield$entrezgene_id)], ]
# Select cell cycle phase of interest
cc_whitfield$cc_phase <- gsub("S phase", "S", cc_whitfield$cc_phase)
cc_whitfield <- cc_whitfield[cc_whitfield$cc_phase %in% c("G1/S", "S", "G2", "G2/M"), ]
# Convert human entrenz gene ID to human symbol
library(biomaRt)
ensembl_hgnc <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
symbol_hgnc <- getBM(attributes = c("entrezgene_id", "hgnc_symbol"), filters = "entrezgene_id", values = cc_whitfield$entrezgene_id, mart = ensembl_hgnc)
# Convert human symbol to mouse symbol
ensembl_mm <- useMart("ensembl", dataset = "mmusculus_gene_ensembl")
symbol_mm <- getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = symbol_hgnc$hgnc_symbol, mart = ensembl_hgnc, attributesL = c("mgi_symbol"), martL = ensembl_mm, uniqueRows = T)
colnames(symbol_mm) <- c("hgnc_symbol", "mgi_symbol")
# Combine and filter conversion results
convert <- dplyr::left_join(symbol_hgnc, symbol_mm, by = "hgnc_symbol")
convert <- convert[!convert$entrezgene_id %in% convert$entrezgene_id[duplicated(convert$entrezgene_id)], ]
convert <- convert[!convert$mgi_symbol %in% convert$mgi_symbol[duplicated(convert$mgi_symbol)], ]
# Add mgi_symbol to whitfield cell cycle phase annotation
cc_whitfield <- dplyr::left_join(cc_whitfield, convert, by = "entrezgene_id")
# Remove entries without mouse gene symbol annotation
cc_whitfield <- na.omit(cc_whitfield)
# cc_whitfield <- cc_whitfield[cc_whitfield$mgi_symbol %in% rownames(so), ]
cc_genes_whitfield <- list(
g1s = cc_whitfield[cc_whitfield$cc_phase == "G1/S", ]$mgi_symbol,
s = cc_whitfield[cc_whitfield$cc_phase == "S", ]$mgi_symbol,
g2 = cc_whitfield[cc_whitfield$cc_phase == "G2", ]$mgi_symbol,
g2m = cc_whitfield[cc_whitfield$cc_phase == "G2/M", ]$mgi_symbol
)
The input to any clustering method, such as linkage, needs to measure the dissimilarity of objects. The correlation measures similarity. So it needs to be transformed in a way such that 0 correlation is mapped to a large number, while 1 correlation is mapped to 0. This is done by computing the euclidean distance (dissimilarity) of 1-R correlation values.
Hierarchical Clustering
Agglomerative : An agglomerative approach begins with each observation in a distinct (singleton) cluster, and successively merges clusters together until a stopping criterion is satisfied.
Divisive : A divisive method begins with all patterns in a single cluster and performs splitting until a stopping criterion is met.
# Compute correlation between gene sets
module_corr <- function(so, genes) {
#' Compute correlation matrix from Seurat object for gene moduls
#'
#' @param so Seurat object.
#' @param genes List of genes.
cnt <- GetAssayData(so, assay = "RNA", slot = "data")
module_corr <- list()
for(module in names(genes)) {
cnt_module <- cnt[rownames(cnt) %in% genes[[module]], ]
cnt_module <- t(as.matrix(cnt_module))
corr <- cor(cnt_module, method = "pearson", use = "complete.obs")
corr[is.na(corr)] <- 0
module_corr[[module]] <- corr
}
return(module_corr)
}
# Compute hierarchical clustering of R distance matrix
module_corr_clust <- function(module_corr) {
#' Compute hierarchical cluster from correlation matrix with dissimilarity euclidean 1-R distance matrix
#'
#' @param module_corr List of correlation matrix for gene sets.
h_cluster <- list()
for(module in names(module_corr)) {
corr_dist <- dist(module_corr[[module]], method = "euclidean")
h_cluster[[module]] <- hclust(corr_dist, method = "complete")
}
return(h_cluster)
}
# Compute heatmap of correlation scores based on hierarchical clustering of 1-R dissimilarity
module_corr_hm <- function(module_corr, module_corr_clust, title = "Heat map") {
#' Compute heatmap based on correlation and hierarchical clustering
#'
#' @param module_corr List of correlation matrix for gene sets.
#' @param module_corr_clust List of hierarchical cluster corresponding to module_corr.
module_corr_hm <- list()
for(module in names(module_corr)) {
# Compute dendrogram from hierarchical clustering
dend <- as.dendrogram(module_corr_clust[[module]])
color_break <- seq(-1, 1, 0.01)
# color_ramp <- viridis(length(color_break), option = "plasma")
color_ramp<- colorRampPalette(c("blue", "white", "red"))(100)
hm <- grid.grabExpr(draw(ComplexHeatmap::pheatmap(
mat = module_corr[[module]],
main = title,
fontsize_row = 10,
scale = "none",
cluster_rows = dend,
cluster_cols = dend,
cellwidth = 2.5,
cellheight = 2.5,
treeheight_row = 15,
treeheight_col = 15,
show_rownames = FALSE,
show_colnames = FALSE,
row_split = 2,
column_split = 2,
color = color_ramp,
breaks = color_break,
border_color = NA)))
module_corr_hm[[module]] <- hm
}
return(module_corr_hm)
}
module_corr_cc <- lapply(so_l, module_corr, genes = cc_genes_whitfield)
module_corr_clust_cc <- lapply(module_corr_cc, module_corr_clust)
module_corr_hm_cc <- mapply(module_corr_hm, module_corr_cc, module_corr_clust_cc, title = as.list(names(module_corr_cc)), SIMPLIFY = FALSE)
module_corr_hm_g1s <- lapply(module_corr_hm_cc, "[[", 1)
module_corr_hm_s <- lapply(module_corr_hm_cc, "[[", 2)
module_corr_hm_g2 <- lapply(module_corr_hm_cc, "[[", 3)
module_corr_hm_g2m <- lapply(module_corr_hm_cc, "[[", 4)
options(repr.plot.width = 25, repr.plot.height = 7.5)
do.call(grid.arrange, c(module_corr_hm_g1s, ncol = 4, top = "G1S phase genes"))
do.call(grid.arrange, c(module_corr_hm_s, ncol = 4, top = "S phase genes"))
do.call(grid.arrange, c(module_corr_hm_g2, ncol = 4, top = "GS phase genes"))
do.call(grid.arrange, c(module_corr_hm_g2m, ncol = 4, top = "G2M phase genes"))
cc_genes_whitfield <- list(
Myeloid_CpG = list(s = cc_whitfield[cc_whitfield$cc_phase == "S", ]$mgi_symbol, g2m = cc_whitfield[cc_whitfield$cc_phase == "G2/M", ]$mgi_symbol),
Progenitor_CpG =list(s = cc_whitfield[cc_whitfield$cc_phase == "S", ]$mgi_symbol, g2m = cc_whitfield[cc_whitfield$cc_phase == "G2/M", ]$mgi_symbol),
Myeloid_NaCl = list(s = cc_whitfield[cc_whitfield$cc_phase == "S", ]$mgi_symbol, g2m = cc_whitfield[cc_whitfield$cc_phase == "G2/M", ]$mgi_symbol),
Progenitor_NaCl =list(s = cc_whitfield[cc_whitfield$cc_phase == "S", ]$mgi_symbol, g2m = cc_whitfield[cc_whitfield$cc_phase == "G2/M", ]$mgi_symbol)
)
cc_genes_seurat_s <- cc.genes.updated.2019$s.genes
cc_genes_seurat_g2m <- cc.genes.updated.2019$g2m.genes
# Translate human to mouse
cc_genes_seurat_s <- getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = cc_genes_seurat_s, mart = ensembl_hgnc, attributesL = c("mgi_symbol"), martL = ensembl_mm, uniqueRows = T)
cc_genes_seurat_s <- cc_genes_seurat_s[, 2]
cc_genes_seurat_g2m <- getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = cc_genes_seurat_g2m, mart = ensembl_hgnc, attributesL = c("mgi_symbol"), martL = ensembl_mm, uniqueRows = T)
cc_genes_seurat_g2m <- cc_genes_seurat_g2m[, 2]
module_corr_cc <- lapply(so_l, module_corr, genes = list(s = cc_genes_seurat_s, g2m = cc_genes_seurat_g2m))
module_corr_clust_cc <- lapply(module_corr_cc, module_corr_clust)
module_corr_hm_cc <- mapply(module_corr_hm, module_corr_cc, module_corr_clust_cc, title = as.list(names(module_corr_cc)), SIMPLIFY = FALSE)
module_corr_hm_s <- lapply(module_corr_hm_cc, "[[", 1)
module_corr_hm_g2m <- lapply(module_corr_hm_cc, "[[", 2)
options(repr.plot.width = 25, repr.plot.height = 7.5)
do.call(grid.arrange, c(module_corr_hm_s, ncol = 4, top = "S phase genes"))
do.call(grid.arrange, c(module_corr_hm_g2m, ncol = 4, top = "G2M phase genes"))
cc_genes_seurat <- list(
Myeloid_CpG = list(s = cc_genes_seurat_s, g2m = cc_genes_seurat_g2m),
Progenitor_CpG =list(s = cc_genes_seurat_s, g2m = cc_genes_seurat_g2m),
Myeloid_NaCl = list(s = cc_genes_seurat_s, g2m = cc_genes_seurat_g2m),
Progenitor_NaCl =list(s = cc_genes_seurat_s, g2m = cc_genes_seurat_g2m)
)
cc_module <- function(so, cc_genes, index = "") {
cc_phase_class <- paste0("cc_phase_class", "_", index)
msS_RNA <- paste0("msS_RNA", "_", index)
msG2M_RNA <- paste0("msG2M_RNA", "_", index)
msCC_diff_RNA = paste0("msCC_diff_RNA_", index)
s_feature <- cc_genes[["s"]]
g2m_feature <- cc_genes[["g2m"]]
so <- CellCycleScoring(so, s.features = s_feature, g2m.features = g2m_feature, set.ident = FALSE)
colnames(so@meta.data) <- gsub("Phase", cc_phase_class, colnames(so@meta.data))
colnames(so@meta.data) <- gsub("S.Score", msS_RNA, colnames(so@meta.data))
colnames(so@meta.data) <- gsub("G2M.Score", msG2M_RNA, colnames(so@meta.data))
so[[msCC_diff_RNA]] <- so[[msS_RNA]] - so[[msG2M_RNA]]
so[[cc_phase_class]] <- factor(so[[cc_phase_class, drop = TRUE]], levels = names(color$cc_phase_class))
return(so)
}
so_l <- mapply(cc_module, so_l, cc_genes_whitfield, index = "w", SIMPLIFY = FALSE)
so_l <- mapply(cc_module, so_l, cc_genes_seurat, index = "s", SIMPLIFY = FALSE)
options(repr.plot.width = 10, repr.plot.height = 10)
cc_plot <- function(so) {
cc_plot <- ggplot(so@meta.data, aes(x = msS_RNA_w, y = msG2M_RNA_w, color = cc_phase_class_w)) +
geom_point() +
geom_vline(aes(xintercept = 0), color = "black", linetype = "longdash") +
geom_hline(aes(yintercept = 0), color = "black", linetype = "longdash") +
ggtitle("Cell cycle phase") + xlab("S-phase [module score]") + ylab("G2M [module score]") +
scale_color_manual(values = color$cc_phase_class) +
facet_grid(tissue~treatment) +
theme(aspect.ratio = 1)
return(cc_plot)
}
cc_plot_l <- lapply(so_l, cc_plot)
cc_plot <- cc_plot_l[[1]] + cc_plot_l[[2]] + cc_plot_l[[3]] + cc_plot_l[[4]]
cc_plot
ggsave(cc_plot, filename = "cc_plot_w.png", width = 10, height = 10)
options(repr.plot.width = 10, repr.plot.height = 10)
cc_plot <- function(so) {
cc_plot <- ggplot(so@meta.data, aes(x = msS_RNA_s, y = msG2M_RNA_s, color = cc_phase_class_s)) +
geom_point() +
geom_vline(aes(xintercept = 0), color = "black", linetype = "longdash") +
geom_hline(aes(yintercept = 0), color = "black", linetype = "longdash") +
ggtitle("Cell cycle phase") + xlab("S-phase [module score]") + ylab("G2M [module score]") +
scale_color_manual(values = color$cc_phase_class) +
facet_grid(tissue~treatment) +
theme(aspect.ratio = 1)
return(cc_plot)
}
cc_plot_l <- lapply(so_l, cc_plot)
cc_plot <- cc_plot_l[[1]] + cc_plot_l[[2]] + cc_plot_l[[3]] + cc_plot_l[[4]]
cc_plot
ggsave(cc_plot, filename = "cc_plot_s.png", width = 10, height = 10)
options(repr.plot.width = 15, repr.plot.height = 10)
cc_plot <- function(so) {
cc_plot <- ggplot(so@meta.data, aes(x = msS_RNA_w, y = msG2M_RNA_w, color = msCC_diff_RNA_w)) +
geom_point() +
geom_vline(aes(xintercept = 0), color = "black", linetype = "longdash") +
geom_hline(aes(yintercept = 0), color = "black", linetype = "longdash") +
ggtitle("Cell cycle phase") + xlab("S-phase [module score]") + ylab("G2M [module score]") +
facet_grid(tissue~treatment+sample_rep) +
theme(aspect.ratio = 1)
return(cc_plot)
}
cc_plot_l <- lapply(so_l, cc_plot)
cc_plot_l[[1]] + cc_plot_l[[2]] + cc_plot_l[[3]] + cc_plot_l[[4]]
options(repr.plot.width = 15, repr.plot.height = 10)
cc_plot <- function(so) {
cc_plot <- ggplot(so@meta.data, aes(x = msS_RNA_s, y = msG2M_RNA_s, color = msCC_diff_RNA_s)) +
geom_point() +
geom_vline(aes(xintercept = 0), color = "black", linetype = "longdash") +
geom_hline(aes(yintercept = 0), color = "black", linetype = "longdash") +
ggtitle("Cell cycle phase") + xlab("S-phase [module score]") + ylab("G2M [module score]") +
facet_grid(tissue~treatment+sample_rep) +
theme(aspect.ratio = 1)
return(cc_plot)
}
cc_plot_l <- lapply(so_l, cc_plot)
cc_plot_l[[1]] + cc_plot_l[[2]] + cc_plot_l[[3]] + cc_plot_l[[4]]
options(repr.plot.width = 10, repr.plot.height = 5)
cc_genes_pca <- function(so) {
cnt <- GetAssayData(so, assay = "SCT", slot = "scale.data")
cnt <- cnt[rownames(cnt) %in% c(cc_genes_seurat_s, cc_genes_seurat_g2m), ]
pca <- prcomp(t(cnt), scale. = FALSE, center = FALSE)
eigs <- pca$sdev^2
pca_data <- cbind(pca$x, so@meta.data)
pca_1 <- ggplot(pca_data, aes(x = PC1, y = PC2, fill = cc_phase_class_s)) +
geom_point(color = "black", pch = 21, size = 3) +
ggtitle("PCA") +
xlab(paste("PC1 (", round(100 * eigs[1] / sum(eigs), digits = 1), " %)")) +
ylab(paste("PC2 (", round(100 * eigs[2] / sum(eigs), digits = 1), " %)")) +
scale_fill_manual(values = color$cc_phase_class) +
guides(fill = guide_legend(title = "Cell cycle phase")) +
theme(aspect.ratio = 1)
pca_2 <- ggplot(pca_data, aes(x = PC1, y = PC2, fill = sample_rep)) +
geom_point(color = "black", pch = 21, size = 3) +
ggtitle("PCA") +
xlab(paste("PC1 (", round(100 * eigs[1] / sum(eigs), digits = 1), " %)")) +
ylab(paste("PC2 (", round(100 * eigs[2] / sum(eigs), digits = 1), " %)")) +
# scale_fill_manual(values = color$cc_phase_class) +
guides(fill = guide_legend(title = "Cell cycle phase")) +
theme(aspect.ratio = 1)
pca_1 + pca_2
}
lapply(so_l, cc_genes_pca)
$Myeloid_CpG $Progenitor_CpG $Myeloid_NaCl $Progenitor_NaCl
so_l_cc <- lapply(so_l, function(so) SCTransform(so, verbose = FALSE, vars.to.regress = c("cc_phase_class_s"), assay = "RNA"))
lapply(so_l_cc, cc_genes_pca)
$Myeloid_CpG $Progenitor_CpG $Myeloid_NaCl $Progenitor_NaCl